plot_heatmaps.ipynb¶
This notebook makes heatmaps of entry and binding data from filtered data
- Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
# input configs
altair_config = None
nipah_config = None
# input files
entropy_file = None
func_scores_E2_file = None
binding_E2_file = None
e2_low_func_file = None
func_scores_E3_file = None
binding_E3_file = None
e3_low_func_file = None
# output images
E2_binding_heatmap = None
E3_binding_heatmap = None
E2_entry_heatmap = None
E3_entry_heatmap = None
combined_entry_binding_contact = None
E3_entry_AA_prop_heatmap = None
E2_entry_AA_prop_heatmap = None
glycan_sites_img_heatmap = None
nipah_poly_sites_img_heatmap = None
HENV103_heatmap_plot = None
nAH1_heatmap_plot = None
HENV117_heatmap_plot = None
HENV32_heatmap_plot = None
HENV26_heatmap_plot = None
m102_heatmap_plot = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/heatmap_theme.py"
entropy_file = "results/entropy/entropy.csv"
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
E2_binding_heatmap = "results/images/E2_binding_heatmap.html"
E3_binding_heatmap = "results/images/E3_binding_heatmap.html"
E2_entry_heatmap = "results/images/E2_entry_heatmap.html"
E3_entry_heatmap = "results/images/E3_entry_heatmap.html"
combined_entry_binding_contact = (
"results/images/combined_entry_binding_contact_heatmaps.html"
)
E3_entry_AA_prop_heatmap = "results/images/E3_entry_AA_prop_heatmap.html"
E2_entry_AA_prop_heatmap = "results/images/E2_entry_AA_prop_heatmap.html"
glycan_sites_img_heatmap = "results/images/glycan_sites_img_heatmap.html"
nipah_poly_sites_img_heatmap = "results/images/nipah_poly_sites_img_heatmap.html"
HENV103_heatmap_plot = "results/images/HENV103_heatmap_plot.html"
nAH1_heatmap_plot = "results/images/nAH1_heatmap_plot.html"
HENV117_heatmap_plot = "results/images/HENV117_heatmap_plot.html"
HENV32_heatmap_plot = "results/images/HENV32_heatmap_plot.html"
HENV26_heatmap_plot = "results/images/HENV26_heatmap_plot.html"
m102_heatmap_plot = "results/images/m102_heatmap_plot.html"
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import heatmap_theme
alt.themes.register('heatmap_theme', heatmap_theme.heatmap_theme)
alt.themes.enable('heatmap_theme')
Setup in correct directory
Out[3]:
ThemeRegistry.enable('heatmap_theme')
In [4]:
if nipah_config is None:
##hard paths in case don't want to run with snakemake
print("loading hard paths")
#altair_config = "data/custom_analyses_data/heatmap_theme.py"
nipah_config = "nipah_config.yaml"
entropy_file = "results/entropy/entropy.csv"
# input files
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"
Read configs¶
In [5]:
with open(nipah_config) as f:
config = yaml.safe_load(f)
In [6]:
# This function helps generate bounds for the heatmaps to make it even
# for start, end in [(71, 204), (204, 337), (337, 470), (470, 603)]
def generate_numbers(start=71, increment=65, final=602, count=8):
numbers = []
for _ in range(count):
next_start = start + increment
numbers.append((start, next_start))
start = next_start
# For the final tuple, the second number is fixed at 603
numbers.append((start, final))
return numbers
# Generating the numbers
generated_numbers = generate_numbers()
print(generated_numbers)
[(71, 136), (136, 201), (201, 266), (266, 331), (331, 396), (396, 461), (461, 526), (526, 591), (591, 602)]
In [7]:
# import filtered data
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"
e2_binding = pd.read_csv(binding_E2_file).round(2)
e2_func = pd.read_csv(func_scores_E2_file).round(2)
e2_low_func = pd.read_csv(e2_low_func_file).round(2)
e3_binding = pd.read_csv(binding_E3_file).round(2)
e3_func = pd.read_csv(func_scores_E3_file).round(2)
e3_low_func = pd.read_csv(e3_low_func_file).round(2)
e3_low_func_ab = pd.read_csv(e3_low_func_file_ab).round(2)
antibody_escape = pd.read_csv(antibody_file).round(2)
In [8]:
def prepare_distance(distance_file):
# read in residue distance data, calculated in different notebook
df = distance_file[["site", "distance"]]
df = df.dropna(subset=["site"])
df["site"] = df["site"].astype("Int64")
df["distance"] = df["distance"].round(2)
df = df[["site", "distance"]].drop_duplicates()
df["mutant"] = "distance"
df["wildtype"] = ""
df["type"] = "distance"
df.rename(columns={"distance": "value"}, inplace=True)
return df
def prepare_entropy(): # need to prepare entropy data for plotting on heatmap
# read in entropy data, calculated in different notebook
entropy = pd.read_csv(entropy_file)
df = entropy[["site", "henipavirus_entropy"]]
df = df.dropna(subset=["site"])
df["site"] = df["site"].astype("Int64")
df = df.rename(columns={"henipavirus_entropy": "entropy"})
df["entropy"] = df["entropy"].round(2)
df = df[["site", "entropy"]].drop_duplicates()
df["mutant"] = "entropy"
df["wildtype"] = ""
df["type"] = "entropy"
df.rename(columns={"entropy": "value"}, inplace=True)
return df
def make_contact():
df = pd.DataFrame(
{
"site": config["contact_sites"],
"contact": [0.0] * len(config["contact_sites"]),
}
)
# Renaming and restructuring the dataframe
df["mutant"] = "receptor contact"
df["wildtype"] = ""
df["type"] = "receptor contact"
df.rename(columns={"contact": "value"}, inplace=True)
return df
# This gets called during heatmap generation
def make_empty_df(
df,
contact_df=None,
entropy_df=None,
contact_flag=None,
entropy_flag=None,
low_entry_df=None,
binding_flag=None,
escape_flag=None,
distance_df=None,
):
sites = range(71, 603)
amino_acids = [
"R",
"K",
"H",
"D",
"E",
"Q",
"N",
"S",
"T",
"Y",
"W",
"F",
"A",
"I",
"L",
"M",
"V",
"G",
"P",
"C",
]
# Create the combination of each site with each amino acid
data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]
# Create the DataFrame
empty_df = pd.DataFrame(data)
if binding_flag:
print("plotting binding")
if low_entry_df is None:
print("You indicated binding but did not provide a low_entry_df")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["binding_mean"],
var_name="type",
value_name="value",
)
low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
df_filter = low_entry_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["low_effect"],
var_name="type",
value_name="value",
)
df_test = pd.concat([df_test, df_filter])
elif escape_flag:
print("plotting escape")
if low_entry_df is None:
print("You indicated escape but did not provide a low_entry_df")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["escape_mean"],
var_name="type",
value_name="value",
)
low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
df_filter = low_entry_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["low_effect"],
var_name="type",
value_name="value",
)
df_test = pd.concat([df_test, df_filter])
else:
print("plotting entry")
all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
df_test = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["effect"],
var_name="type",
value_name="value",
)
if contact_flag and entropy_flag is None:
df_test = pd.concat([df_test], ignore_index=True)
if contact_flag is True:
df_test = pd.concat([df_test, contact_df], ignore_index=True)
if entropy_flag is True:
df_test = pd.concat([df_test, entropy_df], ignore_index=True)
if entropy_flag and contact_flag is True:
df_test = pd.concat([df_test, entropy_df, contact_df], ignore_index=True)
if distance_df is not None:
print('making a distance df')
df_test = pd.concat([df_test,distance_df], ignore_index=True)
return df_test
In [9]:
# Make the base heatmap. This contains information about the x_axis and heatmap_sites which are important for sorting them correctly.
def make_base_heatmap(df, heatmap_sites, x_axis):
base = (
alt.Chart(df)
.encode(
x=alt.X("site:O", title="Site", sort=heatmap_sites, axis=x_axis),
y=alt.Y(
"mutant",
title="Amino Acid",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
axis=alt.Axis(grid=False),
),
)
.properties(
width=alt.Step(10),
height=alt.Step(10),
)
)
return base
# This makes an 'empty' heatmap that shows sites that were not observed as some color (default:gray)
def make_empty_heatmap(base, background_color):
chart_empty = (
base.mark_rect(color=background_color)
.encode(tooltip=["site", "mutant"])
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == "escape_mean"))
& (alt.datum.value == None)
)
)
return chart_empty
# This makes the white squares and X for the wildtype amino acids
def make_wildtype_heatmap(unique_wildtypes_df, strokewidth_size, heatmap_sites):
wildtype_layer_box = (
alt.Chart(unique_wildtypes_df)
.mark_rect(color="white", stroke="#000000", strokeWidth=strokewidth_size)
.encode(
x=alt.X("site:O", sort=heatmap_sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
),
tooltip=["site", "wildtype"],
)
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
wildtype_layer = (
alt.Chart(unique_wildtypes_df)
.mark_text(color="black", text="X", size=8, align="center", baseline="middle",dy=1)
.encode(
x=alt.X("site:O", sort=heatmap_sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
),
tooltip=["site", "wildtype"],
)
.transform_filter(
((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
return wildtype_layer_box, wildtype_layer
# This makes the actual effect heatmap, and adds a bar for the legend if its the first time through the loop
def create_effect_chart(
base, color_scale_effect, strokewidth_size, legend_title=None, effect_legend=None
):
chart = (
base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size)
.encode(
color=alt.condition(
'(datum.type == "effect" | datum.type == "binding_mean" | datum.type == "escape_mean")',
alt.Color("value:Q", scale=color_scale_effect, legend=effect_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
.transform_filter((alt.datum.wildtype != "") & (alt.datum.wildtype != None))
)
return chart
# This makes a chart for the entropy values
def create_entropy_chart(
base, color_scale_entropy, strokewidth_size, legend_title=None, entropy_legend=None
):
chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
color=alt.condition(
'datum.mutant == "entropy"',
alt.Color("value:Q", scale=color_scale_entropy, legend=entropy_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
return chart
def create_distance_chart(
base, color_scale_entropy, strokewidth_size, legend_title=None, distance_legend=None
):
chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
color=alt.condition(
'datum.mutant == "distance"',
alt.Color("value:Q", scale=alt.Scale(scheme="purples", domain=[0, 10], reverse=True),legend=distance_legend),
alt.value("transparent"),
),
tooltip=["site", "mutant", "wildtype", "value"],
)
return chart
# This makes a chart for the contact sites
def create_contact_chart(base):
chart_contact = (
base.mark_rect(color="black")
.encode(tooltip=["site"])
.transform_filter((alt.datum.mutant == "receptor contact"))
)
return chart_contact
# Masks sites with low cell entry scores for binding or escape
def make_low_effect_heatmap(base, strokewidth_size, heatmap_sites):
chart_filtered = (
base.mark_rect(color="#808285", stroke="#000000", strokeWidth=strokewidth_size) ##939598
.encode()
.transform_filter(alt.datum.type == "low_effect")
)
return chart_filtered
# This compiles all the different charts and returns a single chart
def compile_chart(
df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect,
color_scale_entropy,
strokewidth_size=None,
legend_title=None,
effect_legend=None,
entropy_legend=None,
binding_flag=None,
escape_flag=None,
distance_df=None,
distance_legend=None
):
base = make_base_heatmap(df, heatmap_sites, x_axis)
chart_empty = make_empty_heatmap(base, background_color)
chart_contact = create_contact_chart(base)
chart_effect = create_effect_chart(
base, color_scale_effect, strokewidth_size, legend_title, effect_legend
)
chart_entropy = create_entropy_chart(
base, color_scale_entropy, strokewidth_size, legend_title, entropy_legend
)
chart_distance = create_distance_chart(
base, color_scale_entropy, strokewidth_size, legend_title, distance_legend
)
wildtype_layer_box, wildtype_layer = make_wildtype_heatmap(
unique_wildtypes_df, strokewidth_size, heatmap_sites
)
if binding_flag:
print("Now making dataframe for binding")
low_entry_heatmap = make_low_effect_heatmap(
base, strokewidth_size, heatmap_sites
)
chart = alt.layer(
chart_empty,
chart_effect,
low_entry_heatmap,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
elif escape_flag:
print("Now making dataframe for escape")
low_entry_heatmap = make_low_effect_heatmap(
base, strokewidth_size, heatmap_sites
)
chart = alt.layer(
chart_empty,
chart_effect,
low_entry_heatmap,
chart_distance,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
else:
chart = alt.layer(
chart_empty,
chart_effect,
chart_entropy,
chart_contact,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(y="shared", x="shared", color="independent")
return chart
In [10]:
def plot_entry_heatmap(
df,
legend_title,
null_color=None,
ranges=None,
effect_color=None,
entropy_color=None,
strokewidth_size=None,
custom_y_axis_order=None,
entropy_flag=None,
contact_flag=None,
specific_sites=None,
specific_sites_name=None,
low_entry_df=None,
binding_flag=None,
escape_flag=None,
custom_domain=None,
escape_distance=None,
):
"""
Generates a customizable heatmap for deep mutational scanning (DMS) data visualization.
Parameters:
- df (DataFrame): The data frame containing the data to be visualized. It must include the columns 'site', 'mutant', 'value', and 'wildtype'.
- legend_title (str): The title of the heatmap legend.
- null_color (str, optional): Color for mutants with no observations. Default is 'gray'.
- ranges (list of tuples, optional): Defines the ranges for site wrapping on the heatmap. If not provided, a default range is used.
- effect_color (str, optional): Color scheme for effect values. Default is 'red-blue'.
- entropy_color (str, optional): Color scheme for entropy values. Default is 'purples'.
- strokewidth_size (float, optional): The width of the stroke used in the heatmap. Default size is not specified.
- custom_y_axis_order (list, optional): Specifies a custom order for the y-axis, overriding the default amino acid order.
- entropy_flag (bool, optional): If True, sequence entropy is included in the heatmap. Default is False.
- contact_flag (bool, optional): If True, contact sites are included in the heatmap. Default is False.
- specific_sites (list, optional): Specifies a subset of sites to be plotted. If None, all sites are plotted using wrapping. Default is None.
- specific_sites_name (str, optional): A title to display at the top of the heatmap for specific sites. Default is None.
- low_entry_df (DataFrame,optional): If given, will use different color to show sites with low entry scores (Used for Binding Score Heatmaps)
- binding_flag (bool, optional): If True, will plot binding instead of entry. Must be used with low_entry_df to mask low cell entry mutants
- custom_domain (list, optional): Give custom domain used for coloring. If None, will use default [-4,2.5]
- escape_flag (bool,optional): If True, will plot escape instead of entry. Must be used with low_entry_df to mask low cell entry mutants
- escape_distance (DataFrame, optional): give distance file for residues
Returns:
An Altair chart object representing the generated heatmap. This chart can be further customized or directly displayed in Jupyter notebooks or other compatible environments.
"""
if contact_flag:
contact_df = make_contact()
else:
contact_df = None
if entropy_flag is True:
entropy_df = prepare_entropy()
else:
entropy_df = None
if escape_distance is not None:
distance_df = prepare_distance(escape_distance)
else:
distance_df = None
# Make the dataframes for plotting.
empty_df = make_empty_df(
df,
contact_df=contact_df,
entropy_df=entropy_df,
distance_df=distance_df,
contact_flag=contact_flag,
entropy_flag=entropy_flag,
low_entry_df=low_entry_df,
binding_flag=binding_flag,
escape_flag=escape_flag,
)
# Define the base order list
base_order = [
"R",
"K",
"H",
"D",
"E",
"Q",
"N",
"S",
"T",
"Y",
"W",
"F",
"A",
"I",
"L",
"M",
"V",
"G",
"P",
"C",
]
# Initialize custom_order with custom_y_axis_order or base_order based on custom_y_axis_order's value
custom_order = (
custom_y_axis_order if custom_y_axis_order is not None else base_order
)
# Prepend conditions based on flags
if entropy_flag and contact_flag:
# Both flags are true, prepend both "contact" and "entropy"
custom_order = ["contact", "entropy"] + custom_order
elif entropy_flag:
# Only entropy_flag is true, prepend "entropy"
custom_order = ["entropy"] + custom_order
elif contact_flag:
# Only contact_flag is true, prepend "contact"
custom_order = ["contact"] + custom_order
elif distance_df is not None:
custom_order = ["distance"] + custom_order
# Optional parameters
if null_color is None:
background_color = "#d1d3d4"
else:
background_color = null_color
# Sites for wrapping heatmap correctly
if ranges is None:
full_ranges = [
list(range(start, end))
for start, end in [
(71, 181),
(181, 291),
(291, 401),
(401, 511),
(511, 603),
]
]
else:
full_ranges = ranges
# effect_color
if custom_domain:
if effect_color is None:
color_scale_effect = alt.Scale(
scheme="redblue", domainMid=0, domain=custom_domain
)
else:
color_scale_effect = alt.Scale(
scheme=effect_color, domainMid=0, domain=custom_domain
)
else:
if effect_color is None:
color_scale_effect = alt.Scale(
scheme="redblue", domainMid=0, domain=[-4, 2.5]
)
else:
color_scale_effect = alt.Scale(
scheme=effect_color, domainMid=0, domain=[-4, 2.5]
)
# entropy_color
if entropy_color is None:
color_scale_entropy = alt.Scale(
scheme="purples", domain=[0, 1.5], reverse=True)
else:
color_scale_entropy = alt.Scale(
scheme=entropy_color, domain=[0, 1.5], reverse=True
)
# strokewidth size
if strokewidth_size is None:
strokewidth_size = 0.25
else:
strokewidth_size = strokewidth_size
def determine_sorting_order(df):
# Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
final_df = df.sort_values("site")
sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
final_df["mutant_rank"] = final_df["mutant"].map(sort_order)
# Map the 'mutant' column to these ranks
# Now sort the dataframe by this rank
final_df = final_df.sort_values("mutant_rank")
sites = sorted(final_df["site"].unique(), key=lambda x: float(x))
return final_df, sites, sort_order
heatmap_df, heatmap_sites, sort_order = determine_sorting_order(empty_df)
# container to hold the charts
charts = []
if specific_sites:
# Filter the heatmap to only show certain sites
subset_df = heatmap_df[heatmap_df["site"].isin(specific_sites)]
# Need to do independently for wildtype here for individual sites
unique_wildtypes_df = subset_df.drop_duplicates(
subset=["site", "wildtype"])
unique_wildtypes_df = unique_wildtypes_df.sort_values("site")
sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
unique_wildtypes_df["mutant_rank"] = unique_wildtypes_df["wildtype"].map(
sort_order
)
unique_wildtypes_df = unique_wildtypes_df.sort_values("mutant_rank")
effect_legend = alt.Legend(
title=legend_title,
titleAlign="left",
tickCount=3,
gradientLength=75,
titleAnchor="start",
offset=5,
titlePadding=2,
titleOrient='top',
gradientThickness=10,
)
entropy_legend = (
alt.Legend(title="Henipavirus Entropy", values=[0, 1, 2])
if entropy_flag is True
else None
)
# Setup x-axis labeling
x_axis = alt.Axis(
labelAngle=-90,
title="Site",
labels=True,
)
# Run the main heatmap compiler function
chart = compile_chart(
subset_df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect=color_scale_effect,
color_scale_entropy=color_scale_entropy,
strokewidth_size=strokewidth_size,
legend_title=legend_title,
effect_legend=effect_legend,
entropy_legend=entropy_legend,
binding_flag=binding_flag,
escape_flag=escape_flag,
)
# Since this is a single chart, I don't know why I need to do this, but I seem to get errors if I don't append and then do alt.vconcat below. I get why I need to do this for multiple heatmaps in a for loop, but not here. Leaving in.
charts.append(chart)
if specific_sites_name:
specific_sites_name = specific_sites_name
else:
specific_sites_name = ""
combined_charts = alt.vconcat(*charts, title=specific_sites_name).resolve_scale(
y="shared", x="shared", color="shared"
)
return combined_charts
else:
for idx, subset in enumerate(full_ranges):
# Flags for showing the legend only the first time
subset_df = heatmap_df[
heatmap_df["site"].isin(subset)
] # for the wrapping of sites
unique_wildtypes_df = subset_df.drop_duplicates(
subset=["site", "wildtype"]
) # for the wildtype mapping
# Keep track of where in the loop we are for plotting
is_last_plot = idx == len(full_ranges) - 1
effect_legend = (
alt.Legend(
title=legend_title,
direction="horizontal",
gradientLength=100,
titleAnchor="middle",
tickCount=3,
labelAlign="left",
titleAlign='center',
)
if is_last_plot
else None
)
entropy_legend = (
alt.Legend(
title="Henipavirus Entropy",
direction="horizontal",
gradientLength=100,
titleAnchor="middle",
values=[0, 1, 2],
labelAlign="center",
)
if is_last_plot and entropy_flag is True
else None
)
if distance_df is not None:
distance_legend = (
alt.Legend(
title="Antibody distance",
direction="horizontal",
gradientLength=100,
titleAnchor="middle",
labelAlign="center",
)
if is_last_plot is True
else None
)
else:
distance_legend = None
x_axis = alt.Axis(
labelAngle=-90,
labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=True,
)
chart = compile_chart(
subset_df,
heatmap_sites,
unique_wildtypes_df,
x_axis,
background_color,
color_scale_effect=color_scale_effect,
color_scale_entropy=color_scale_entropy,
strokewidth_size=strokewidth_size,
legend_title=legend_title,
effect_legend=effect_legend,
entropy_legend=entropy_legend,
distance_legend=distance_legend,
binding_flag=binding_flag,
escape_flag=escape_flag
)
charts.append(chart)
combined_chart = alt.vconcat(
*charts, spacing=3, title=alt.Title(f"{legend_title}",subtitle=['Interactive heatmap of Nipah RBP mutational effects relative to the unmutated reference sequence'])
).resolve_scale(y="shared", x="independent", color="shared").configure_axisY(titlePadding=-50)
return combined_chart
Now that we have heatmap function setup, make heatmaps:¶
First, do binding heatmaps¶
In [11]:
E2_binding_heatmap_full = plot_entry_heatmap(
df=e2_binding,
legend_title="bEFNB2 Binding",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
low_entry_df=e2_low_func,
binding_flag=True,
custom_domain=[-6, 2.5],
)
E2_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
E2_binding_heatmap_full.save(E2_binding_heatmap)
plotting binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding
In [12]:
E3_binding_heatmap_full = plot_entry_heatmap(
df=e3_binding,
legend_title="bEFNB3 Binding",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
low_entry_df=e3_low_func,
binding_flag=True,
custom_domain=[-2, 2],
)
E3_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
E3_binding_heatmap_full.save(E3_binding_heatmap)
plotting binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding Now making dataframe for binding
Then, make entry heatmaps:¶
In [13]:
E2_entry_heatmap_full = plot_entry_heatmap(
df=e2_func,
legend_title="CHO-bEFNB2 Entry",
null_color=config["background_color"],
effect_color=config["effect_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
)
E2_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
E2_entry_heatmap_full.save(E2_entry_heatmap)
plotting entry
In [14]:
E3_entry_heatmap_full = plot_entry_heatmap(
df=e3_func,
legend_title="CHO-bEFNB3 Entry",
null_color=config["background_color"],
effect_color=config["effect_color"],
strokewidth_size=config["strokewidth_size"],
contact_flag=True,
)
E3_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
E3_entry_heatmap_full.save(E3_entry_heatmap)
plotting entry
Then, make entry and binding heatmaps for contact sites:¶
Make heatmaps by amino acid property for binding¶
In [15]:
hydrophobic_AA = ["A", "V", "L", "I", "M"]
aromatic_AA = ["Y", "W", "F"]
positive_AA = ["K", "R", "H"]
negative_AA = ["E", "D"]
hydrophilic_AA = ["S", "T", "N", "Q"]
special_AA = ["C","G","P"]
def find_aa_wildtype_sites(df, aa_type):
aa_list = list(df[df["wildtype"].isin(aa_type)]["site"].unique())
return aa_list
def make_AA_lists(df):
hydrophobic_AA_list = find_aa_wildtype_sites(df, hydrophobic_AA)
aromatic_AA_list = find_aa_wildtype_sites(df, aromatic_AA)
positive_AA_list = find_aa_wildtype_sites(df, positive_AA)
negative_AA_list = find_aa_wildtype_sites(df, negative_AA)
hydrophilic_AA_list = find_aa_wildtype_sites(df, hydrophilic_AA)
special_AA_list = find_aa_wildtype_sites(df, special_AA)
all_AA = [
hydrophobic_AA_list,
aromatic_AA_list,
positive_AA_list,
negative_AA_list,
hydrophilic_AA_list,
special_AA_list,
]
return all_AA
AA_names = ["Hydrophobic", "Aromatic", "Positive", "Negative", "Hydrophilic","Special"]
def make_aa_property_charts(
df, df_low, sites_list, sites_name, custom_domain, legend_name
):
empty_charts = []
for aa_type, name in zip(sites_list, sites_name):
aa_property = plot_entry_heatmap(
df=df,
legend_title=legend_name,
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=aa_type,
specific_sites_name=name,
low_entry_df=df_low,
binding_flag=True,
custom_domain=custom_domain,
)
empty_charts.append(aa_property)
combined_chart = alt.vconcat(*empty_charts, spacing=3)
return combined_chart
E2_binding_AA_prop = make_aa_property_charts(
e2_binding, e2_low_func, make_AA_lists(e2_binding), AA_names, [-6, 2.5], "bEFNB2 Binding"
)
E2_binding_AA_prop.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding
In [16]:
E3_binding_AA_prop = make_aa_property_charts(
e3_binding, e3_low_func, make_AA_lists(e3_binding), AA_names, [-2, 2], "bEFNB3 Binding"
)
E3_binding_AA_prop.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding
Now entry by amino acid property¶
In [17]:
def make_aa_property_charts_entry(
df, sites_list, sites_name, legend_name
):
empty_charts = []
for aa_type, name in zip(sites_list, sites_name):
aa_property = plot_entry_heatmap(
df=df,
legend_title=legend_name,
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=aa_type,
specific_sites_name=name,
contact_flag = True,
)
empty_charts.append(aa_property)
combined_chart = alt.vconcat(*empty_charts, spacing=3)
return combined_chart
In [18]:
E2_entry_AA_prop = make_aa_property_charts_entry(
e2_func, make_AA_lists(e2_func),AA_names, "CHO-bEFNB2 entry"
).configure_title(offset=5)
E2_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
E2_entry_AA_prop.save(E2_entry_AA_prop_heatmap)
plotting entry plotting entry plotting entry plotting entry plotting entry plotting entry
In [19]:
E3_entry_AA_prop = make_aa_property_charts_entry(
e3_func, make_AA_lists(e2_func), AA_names, "CHO-bEFNB3 entry"
).configure_title(offset=5)
E3_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
E3_entry_AA_prop.save(E3_entry_AA_prop_heatmap)
plotting entry plotting entry plotting entry plotting entry plotting entry plotting entry
Now for specific sites I want to focus on¶
In [20]:
def make_entry_binding_heatmaps_for_specific_sites(title,sites_list):
specific_sites_E2_binding = plot_entry_heatmap(
df=e2_binding,
legend_title="B2",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
low_entry_df=e2_low_func,
binding_flag=True,
custom_domain=[-6, 2.5],
)
specific_sites_E3_binding = plot_entry_heatmap(
df=e3_binding,
legend_title="B3",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
low_entry_df=e3_low_func,
binding_flag=True,
custom_domain=[-2, 2],
)
specific_sites_E2_entry = plot_entry_heatmap(
df=e2_func,
legend_title="B2",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
)
specific_sites_E3_entry = plot_entry_heatmap(
df=e3_func,
legend_title="B3",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
specific_sites=sites_list,
)
combo_binding_img = alt.vconcat(specific_sites_E2_binding,specific_sites_E3_binding).properties(title='Binding')
combo_entry_img = alt.vconcat(specific_sites_E2_entry,specific_sites_E3_entry).properties(title='Entry')
combo_combo = alt.hconcat(combo_entry_img,combo_binding_img).properties(title=title).configure_title(offset=15)
return combo_combo
In [21]:
dimer_assm_chainA = make_entry_binding_heatmaps_for_specific_sites('ChainA Dimer Interface',[160, 162, 163, 164, 165, 166, 169, 171, 205, 206, 208, 247, 249, 256, 258, 259, 260, 261, 266, 267, 268, 270, 323, 324, 325, 331])
dimer_assm_chainA.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [22]:
dimer_assm_chainB = make_entry_binding_heatmaps_for_specific_sites('ChainB Dimer Interface',[167, 168, 170, 171, 172, 202, 203, 204, 205, 208, 210, 212, 237, 238, 239, 240, 242, 258, 584, 589, 591])
dimer_assm_chainB.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [23]:
BLI_mutant_sites = make_entry_binding_heatmaps_for_specific_sites('Sites Chosen for BLI',[244,305,492,507,530,553,555,559,588])
BLI_mutant_sites.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [24]:
contact_sites_img = make_entry_binding_heatmaps_for_specific_sites('Contact Sites',config['contact_sites'])
contact_sites_img.display()
if combined_entry_binding_contact is not None:
contact_sites_img.save(combined_entry_binding_contact)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [25]:
stalk_sites_img = make_entry_binding_heatmaps_for_specific_sites('Stalk',list(range(72, 148)))
stalk_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [26]:
neck_sites_img = make_entry_binding_heatmaps_for_specific_sites('Neck',list(range(148, 166)))
neck_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [27]:
beta_sheet_sites_img = make_entry_binding_heatmaps_for_specific_sites('Beta Sheets',config['beta_sheet'])
beta_sheet_sites_img.display()
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [28]:
nipah_poly_sites_img = make_entry_binding_heatmaps_for_specific_sites('Nipah polymorphic sites',config['nipah_poly'])
nipah_poly_sites_img.display()
if combined_entry_binding_contact is not None:
nipah_poly_sites_img.save(nipah_poly_sites_img_heatmap)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
In [29]:
glycan_sites_img = make_entry_binding_heatmaps_for_specific_sites('Glycosylation sites',config['glycans'])
glycan_sites_img.display()
if combined_entry_binding_contact is not None:
glycan_sites_img.save(glycan_sites_img_heatmap)
plotting binding Now making dataframe for binding plotting binding Now making dataframe for binding plotting entry plotting entry
Make antibody escape heatmaps¶
In [30]:
m102 = antibody_escape[antibody_escape['ab'] == 'm102.4']
m102_dist = pd.read_csv("results/distances/df_m102_atomic_distances.csv")
HENV26 = antibody_escape[antibody_escape['ab'] == 'HENV-26']
HENV26_dist = pd.read_csv("results/distances/df_HENV26_atomic_distances.csv")
HENV32 = antibody_escape[antibody_escape['ab'] == 'HENV-32']
HENV32_dist = pd.read_csv("results/distances/df_HENV32_atomic_distances.csv")
nAH1 = antibody_escape[antibody_escape['ab'] == 'nAH1.3']
nAH1_dist = pd.read_csv("results/distances/df_nAH_atomic_distances.csv")
HENV117 = antibody_escape[antibody_escape['ab'] == 'HENV-117']
HENV103 = antibody_escape[antibody_escape['ab'] == 'HENV-103']
def make_escape_heatmaps(df,dist,title):
escape_heatmap = plot_entry_heatmap(
df=df,
legend_title=f"Escape from {title}",
null_color=config["background_color"],
effect_color=config["effect_color"],
entropy_color=config["entropy_color"],
strokewidth_size=config["strokewidth_size"],
low_entry_df=e3_low_func_ab,
escape_flag=True,
custom_domain=[-2, 2],
contact_flag=False,
escape_distance = dist,
)
escape_heatmap = escape_heatmap.configure_axisY(titlePadding=-10)
return escape_heatmap
In [31]:
m102_heatmap = make_escape_heatmaps(m102,m102_dist,'m102.4')
m102_heatmap.save(m102_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [32]:
HENV26_heatmap = make_escape_heatmaps(HENV26,HENV26_dist,'HENV-26')
HENV26_heatmap.save(HENV26_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [33]:
HENV32_heatmap = make_escape_heatmaps(HENV32,HENV32_dist,'HENV-32')
HENV32_heatmap.save(HENV32_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [34]:
nAH1_heatmap = make_escape_heatmaps(nAH1,nAH1_dist,'nAH1.3')
nAH1_heatmap.save(nAH1_heatmap_plot)
plotting escape making a distance df Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [35]:
HENV117_heatmap = make_escape_heatmaps(HENV117,dist=None,title='HENV-117')
HENV117_heatmap = HENV117_heatmap.configure_axisY(titlePadding=4)
HENV117_heatmap.save(HENV117_heatmap_plot)
plotting escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [36]:
HENV103_heatmap = make_escape_heatmaps(HENV103,dist=None,title='HENV-103')
HENV103_heatmap = HENV103_heatmap.configure_axisY(titlePadding=4)
HENV103_heatmap.save(HENV103_heatmap_plot)
plotting escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape Now making dataframe for escape
In [ ]: